import sqlite3
import numpy as np
import pandas as pd
import os
current_working_directory = os.getcwd()
print(f"Current working directory: {current_working_directory}")
Current working directory: /Users/farihaislam/Documents/Machine learning and dsci bc udemy
import sqlite3
import pandas as pd
# Correct path to the SQLite database file, as verified from previous successful executions
# Connect to the SQLite database
conn = sqlite3.connect('/Users/farihaislam/Documents/Machine learning and dsci bc udemy/FPA_FOD_20170508.sqlite')
# Read the latitude and longitude data from the 'Fires' table
df = pd.read_sql_query("SELECT latitude, longitude FROM fires;", conn)
# Make sure to close the database connection after your operations are done
conn.close()
# Display the first few rows of the dataframe to confirm successful data retrieval
print(df.head())
LATITUDE LONGITUDE 0 40.036944 -121.005833 1 38.933056 -120.404444 2 38.984167 -120.735556 3 38.559167 -119.913333 4 38.559167 -119.933056
lats = df['LATITUDE'].values
lons = df['LONGITUDE'].values
# bounding box of united states
# bbox_ll = [24.356308, -124.848974]
# bbox_ur = [49.384358, -66.885444]
bbox_ll = [24.0, -125.0]
bbox_ur = [50.0, -66.0]
# geographical center of united states
lat_0 = 39.833333
lon_0 = -98.583333
# compute appropriate bins to aggregate data
# nx is number of bins in x-axis, i.e. longitude
# ny is number of bins in y-axis, i.e. latitude
nx = 80
ny = 40
# form the bins
lon_bins = np.linspace(bbox_ll[1], bbox_ur[1], nx)
lat_bins = np.linspace(bbox_ll[0], bbox_ur[0], ny)
# aggregate the number of fires in each bin, we will only use the density
density, _, _ = np.histogram2d(lats, lons, [lat_bins, lon_bins])
# get the mesh for the lat and lon
lon_bins_2d, lat_bins_2d = np.meshgrid(lon_bins, lat_bins)
# # Here adding one row and column at the end of the matrix, so that
# # density has same dimension as lat_bins_2d, lon_bins_2d, otherwise,
# # using shading='gouraud' will raise error
density = np.hstack((density,np.zeros((density.shape[0],1))))
density = np.vstack((density,np.zeros((density.shape[1]))))
pip install Cartopy
Requirement already satisfied: Cartopy in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (0.22.0) Requirement already satisfied: pyproj>=3.1.0 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Cartopy) (3.6.1) Requirement already satisfied: packaging>=20 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Cartopy) (22.0) Requirement already satisfied: matplotlib>=3.4 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Cartopy) (3.7.0) Requirement already satisfied: numpy>=1.21 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Cartopy) (1.23.5) Requirement already satisfied: shapely>=1.7 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Cartopy) (2.0.3) Requirement already satisfied: pyshp>=2.1 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Cartopy) (2.3.1) Requirement already satisfied: kiwisolver>=1.0.1 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (1.4.4) Requirement already satisfied: fonttools>=4.22.0 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (4.25.0) Requirement already satisfied: contourpy>=1.0.1 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (1.0.5) Requirement already satisfied: python-dateutil>=2.7 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (2.8.2) Requirement already satisfied: cycler>=0.10 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (0.11.0) Requirement already satisfied: pillow>=6.2.0 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (9.4.0) Requirement already satisfied: pyparsing>=2.3.1 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from matplotlib>=3.4->Cartopy) (3.0.9) Requirement already satisfied: certifi in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from pyproj>=3.1.0->Cartopy) (2023.11.17) Requirement already satisfied: six>=1.5 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib>=3.4->Cartopy) (1.16.0) Note: you may need to restart the kernel to use updated packages.
pip install scipy
Requirement already satisfied: scipy in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (1.10.0) Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from scipy) (1.23.5) Note: you may need to restart the kernel to use updated packages.
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy.ndimage import gaussian_filter
# Recalculate bins if necessary (increase for smoother contours)
nx = 160 # Double the number of longitude bins for finer resolution
ny = 80 # Double the number of latitude bins for finer resolution
# Assuming 'lats' and 'lons' variables are your latitude and longitude data points
# Recalculate the bins and density
lon_bins = np.linspace(-125, -66, nx+1) # +1 because these are bin edges
lat_bins = np.linspace(24, 50, ny+1) # +1 because these are bin edges
density, _, _ = np.histogram2d(lats, lons, [lat_bins, lon_bins])
# Create the map projection
projection = ccrs.AlbersEqualArea(central_longitude=lon_0, central_latitude=lat_0)
# Create figure and axis objects
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(1, 1, 1, projection=projection)
# Set the extent of the map
ax.set_extent([-125, -66, 24, 50], crs=ccrs.PlateCarree())
# Add land, coastlines, and state features
ax.add_feature(cfeature.LAND.with_scale('50m'))
ax.add_feature(cfeature.COASTLINE.with_scale('50m'))
ax.add_feature(cfeature.STATES.with_scale('50m'))
# Define the coordinate mesh for plotting
lon_bins_2d, lat_bins_2d = np.meshgrid(lon_bins[:-1], lat_bins[:-1])
# Plot the fire density data
density_smoothed = gaussian_filter(density, sigma=2) # Apply a Gaussian filter to smooth
fire_plot = ax.pcolormesh(lon_bins_2d, lat_bins_2d, density_smoothed,
transform=ccrs.PlateCarree(),
cmap='coolwarm', shading='auto')
# Add a colorbar
cbar = plt.colorbar(fire_plot, orientation='vertical', pad=0.02, aspect=50)
cbar.set_label('Fire Density')
# Define gridline options and label formatting
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=1, color='gray', alpha=0.5, linestyle='--')
gl.top_labels = False
gl.right_labels = False
gl.xlines = True
gl.ylines = True
gl.xlabel_style = {'size': 15, 'color': 'gray'}
gl.ylabel_style = {'size': 15, 'color': 'gray'}
plt.show()
pip install bokeh
Requirement already satisfied: bokeh in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (2.4.3) Requirement already satisfied: pillow>=7.1.0 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (9.4.0) Requirement already satisfied: packaging>=16.8 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (22.0) Requirement already satisfied: tornado>=5.1 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (6.1) Requirement already satisfied: Jinja2>=2.9 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (3.1.2) Requirement already satisfied: typing-extensions>=3.10.0 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (4.4.0) Requirement already satisfied: PyYAML>=3.10 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (6.0) Requirement already satisfied: numpy>=1.11.3 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from bokeh) (1.23.5) Requirement already satisfied: MarkupSafe>=2.0 in /Users/farihaislam/anaconda3/lib/python3.10/site-packages (from Jinja2>=2.9->bokeh) (2.1.1) Note: you may need to restart the kernel to use updated packages.
import sqlite3
import pandas as pd
import numpy as np
import colorcet as cc
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, LogColorMapper
cnx = sqlite3.connect('/Users/farihaislam/Documents/Machine learning and dsci bc udemy/FPA_FOD_20170508.sqlite')
df = pd.read_sql_query("SELECT LATITUDE, LONGITUDE, FIRE_SIZE, STATE FROM fires", cnx)
df.head(5)
| LATITUDE | LONGITUDE | FIRE_SIZE | STATE | |
|---|---|---|---|---|
| 0 | 40.036944 | -121.005833 | 0.10 | CA |
| 1 | 38.933056 | -120.404444 | 0.25 | CA |
| 2 | 38.984167 | -120.735556 | 0.10 | CA |
| 3 | 38.559167 | -119.913333 | 0.10 | CA |
| 4 | 38.559167 | -119.933056 | 0.10 | CA |
pd.options.mode.chained_assignment = None
new = df.loc[(df.loc[:,'STATE']!='AK') & (df.loc[:,'STATE']!='HI') & (df.loc[:,'STATE']!='PR')]
new.loc[:,'LATITUDE'] = ((new.loc[:,'LATITUDE']*10).apply(np.floor))/10
new.loc[:,'LONGITUDE'] = ((new.loc[:,'LONGITUDE']*10).apply(np.floor))/10
new.loc[:,'LL_COMBO'] = new.loc[:,'LATITUDE'].map(str) + '-' + new.loc[:,'LONGITUDE'].map(str)
grouped = new.groupby(['LL_COMBO', 'LATITUDE', 'LONGITUDE'])
number_of_wf = grouped['FIRE_SIZE'].agg(['count']).reset_index()
number_of_wf.head(5)
| LL_COMBO | LATITUDE | LONGITUDE | count | |
|---|---|---|---|---|
| 0 | 24.5--81.7 | 24.5 | -81.7 | 1 |
| 1 | 24.6--81.3 | 24.6 | -81.3 | 2 |
| 2 | 24.6--81.4 | 24.6 | -81.4 | 184 |
| 3 | 24.6--81.5 | 24.6 | -81.5 | 43 |
| 4 | 24.6--81.6 | 24.6 | -81.6 | 11 |
size_of_wf = grouped['FIRE_SIZE'].agg(['mean']).reset_index()
size_of_wf.head(5)
| LL_COMBO | LATITUDE | LONGITUDE | mean | |
|---|---|---|---|---|
| 0 | 24.5--81.7 | 24.5 | -81.7 | 0.500000 |
| 1 | 24.6--81.3 | 24.6 | -81.3 | 0.100000 |
| 2 | 24.6--81.4 | 24.6 | -81.4 | 0.501630 |
| 3 | 24.6--81.5 | 24.6 | -81.5 | 0.646512 |
| 4 | 24.6--81.6 | 24.6 | -81.6 | 0.954545 |
from bokeh.models import LogColorMapper, ColorBar
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.transform import transform
# Assuming number_of_wf is already defined and contains 'count', 'LATITUDE', 'LONGITUDE'
# Create a custom fire-like color palette
fire_palette = ['#000000', '#330000', '#660000', '#990000', '#CC0000', '#FF0000', '#FF3300', '#FF6600', '#FF9900', '#FFCC00', '#FFFF00']
# Define a LogColorMapper with the custom palette
color_mapper = LogColorMapper(palette=fire_palette, low=min(number_of_wf['count']), high=max(number_of_wf['count']))
# Prepare the data source for Bokeh plotting
source = ColumnDataSource(number_of_wf)
# Calculate the bounds for the plot area
lon_min, lon_max = min(number_of_wf['LONGITUDE']), max(number_of_wf['LONGITUDE'])
lat_min, lat_max = min(number_of_wf['LATITUDE']), max(number_of_wf['LATITUDE'])
# Create the figure, setting the x_range and y_range to the bounds
p1 = figure(title="Number of wildfires occurring from 1992 to 2015 (lighter color means more wildfires)",
toolbar_location=None, plot_width=600, plot_height=400,
x_range=(lon_min, lon_max), y_range=(lat_min, lat_max))
# Set background and grid line color
p1.background_fill_color = "black"
p1.grid.grid_line_color = None
p1.axis.visible = False
# Increase the size of the points
point_size = 3 # Adjust as needed
# Add the circles to the plot with the custom color mapper
glyph = p1.circle('LONGITUDE', 'LATITUDE', source=source,
color=transform('count', color_mapper),
size=point_size)
# Add a color bar to the plot
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0,0))
p1.add_layout(color_bar, 'right')
# Show the plot
output_notebook()
show(p1)
from bokeh.models import LogColorMapper, ColorBar
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.transform import transform
# Assuming `size_of_wf` DataFrame is already created and contains 'mean', 'LATITUDE', 'LONGITUDE'
# Create the color mapper for average size
size_color_mapper = LogColorMapper(palette=fire_palette, low=min(size_of_wf['mean']), high=max(size_of_wf['mean']))
# Prepare the data source for Bokeh plotting
source_size = ColumnDataSource(size_of_wf)
# Create the figure for the average size of wildfires
p2 = figure(title="Average size of wildfires occurring from 1992 to 2015 (lighter color means bigger fire)",
toolbar_location=None, plot_width=600, plot_height=400)
# Set background and grid line color
p2.background_fill_color = "black"
p2.grid.grid_line_color = None
p2.axis.visible = False
# Add the circles to the plot with the custom color mapper
glyph = p2.circle('LONGITUDE', 'LATITUDE', source=source_size,
color=transform('mean', size_color_mapper),
size=1)
# Add a color bar to the plot
color_bar = ColorBar(color_mapper=size_color_mapper, label_standoff=12, border_line_color=None, location=(0,0))
p2.add_layout(color_bar, 'right')
# Show the plot
output_notebook()
show(p2)
from astropy.time import Time
import pandas as pd
import sqlite3
# Reconnect to the SQLite database to get the raw DISCOVERY_DATE data
conn = sqlite3.connect('/Users/farihaislam/Documents/Machine learning and dsci bc udemy/FPA_FOD_20170508.sqlite')
# Read the raw DISCOVERY_DATE data from the 'Fires' table
df = pd.read_sql_query("SELECT DISCOVERY_DATE, LATITUDE, LONGITUDE, FIRE_SIZE, STATE FROM Fires;", conn)
conn.close()
# Convert the DISCOVERY_DATE from Julian dates to standard Gregorian dates
# The Time() object from astropy handles Julian dates with fractional days
df['DISCOVERY_DATE'] = Time(df['DISCOVERY_DATE'], format='jd').to_datetime()
# This will print all column names in the DataFrame
print(df.columns)
Index(['DISCOVERY_DATE', 'LATITUDE', 'LONGITUDE', 'FIRE_SIZE', 'STATE'], dtype='object')
import matplotlib.pyplot as plt
# Ensure the YEAR column is extracted correctly
df['YEAR'] = df['DISCOVERY_DATE'].dt.year
# Trend Over Time: Number of Wildfires
annual_wildfire_counts = df.groupby('YEAR').size()
# Trend Over Time: Average Wildfire Size
annual_wildfire_size = df.groupby('YEAR')['FIRE_SIZE'].mean()
# Plotting the number of wildfires over the years
plt.figure(figsize=(12, 6))
annual_wildfire_counts.plot(kind='line')
plt.title('Annual Wildfire Occurrences Over Time')
plt.ylabel('Number of Wildfires')
plt.xlabel('Year')
plt.grid(True)
plt.show()
# Plotting the average size of wildfires over the years
plt.figure(figsize=(12, 6))
annual_wildfire_size.plot(kind='line', color='orange')
plt.title('Annual Average Wildfire Size Over Time')
plt.ylabel('Average Size (acres)')
plt.xlabel('Year')
plt.grid(True)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Set the aesthetic style of the plots
sns.set_theme(style="whitegrid")
# Define the month names to use as labels
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
# Plotting the seasonality of wildfire occurrences by month with seaborn
plt.figure(figsize=(12, 6))
sns.barplot(x=monthly_fire_count.index, y=monthly_fire_count.values, palette='autumn')
plt.xticks(np.arange(12), month_names) # Set custom month labels
plt.title('Monthly Wildfire Occurrences')
plt.ylabel('Number of Wildfires')
plt.xlabel('Month')
plt.show()
# Plotting the seasonality of average wildfire size by month with seaborn
plt.figure(figsize=(12, 6))
sns.barplot(x=monthly_fire_size_mean.index, y=monthly_fire_size_mean.values, palette='autumn')
plt.xticks(np.arange(12), month_names) # Set custom month labels
plt.title('Monthly Average Wildfire Size')
plt.ylabel('Average Size (acres)')
plt.xlabel('Month')
plt.show()
# Overlaying a line plot on the monthly wildfire occurrences bar plot
plt.figure(figsize=(12, 6))
sns.barplot(x=monthly_fire_count.index, y=monthly_fire_count.values, color='lightcoral', alpha=0.6, label='Occurrences')
sns.lineplot(x=monthly_fire_count.index-1, y=monthly_fire_count.values, marker='o', color='red', label='Trend Line')
plt.xticks(np.arange(12), month_names)
plt.title('Monthly Wildfire Occurrences with Trend')
plt.ylabel('Number of Wildfires')
plt.xlabel('Month')
plt.legend()
plt.show()
# Overlaying a line plot on the monthly average wildfire size bar plot
plt.figure(figsize=(12, 6))
sns.barplot(x=monthly_fire_size_mean.index, y=monthly_fire_size_mean.values, color='skyblue', alpha=0.6, label='Average Size')
sns.lineplot(x=monthly_fire_size_mean.index-1, y=monthly_fire_size_mean.values, marker='o', color='blue', label='Trend Line')
plt.xticks(np.arange(12), month_names)
plt.title('Monthly Average Wildfire Size with Trend')
plt.ylabel('Average Size (acres)')
plt.xlabel('Month')
plt.legend()
plt.show()
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
# Prepare a DataFrame for plotting
monthly_data = df.groupby('MONTH').agg(count=('FIRE_SIZE', 'size'), average_size=('FIRE_SIZE', 'mean')).reset_index()
monthly_data['MONTH_NAME'] = monthly_data['MONTH'].apply(lambda x: month_names[x-1])
# Plotting
plt.figure(figsize=(14, 7))
sns.lineplot(data=monthly_data, x='MONTH_NAME', y='count', marker='o', color='blue', label='Number of Wildfires')
ax2 = plt.twinx()
sns.lineplot(data=monthly_data, x='MONTH_NAME', y='average_size', marker='o', color='orange', ax=ax2, label='Average Size (acres)')
plt.title('Wildfire Trends: Occurrences and Average Size by Month')
ax2.set_ylabel('Average Size (acres)')
plt.ylabel('Number of Wildfires')
plt.xlabel('Month')
# Handling legends for both y-axes
lines, labels = plt.gca().get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc='upper left')
plt.show()
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/Users/farihaislam/anaconda3/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):